Normalize reads and create DGE objects

library(tidyverse)
Registered S3 method overwritten by 'dplyr':
  method           from
  print.rowwise_df     
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.2.1     ✓ purrr   0.3.3
✓ tibble  2.1.3     ✓ dplyr   0.8.3
✓ tidyr   1.0.0     ✓ stringr 1.4.0
✓ readr   1.3.1     ✓ forcats 0.4.0
── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(edgeR)
Loading required package: limma

get reads and create data frame

files <- dir("../input/Kallisto_output/", include.dirs = TRUE)
files %>% head()
[1] "1a1_q_002_S1_R1_001" "1a2_q_003_S2_R1_001" "1a3_q_004_S3_R1_001"
[4] "1a4_q_005_S4_R1_001" "1a5_q_006_S5_R1_001" "1a6_q_007_S6_R1_001"
counts.list <- map(files, ~ read_tsv(
  file=file.path("..","input","Kallisto_output",.,"abundance.tsv"),
  col_types = "cdddd"))
names(counts.list) <- files
counts <- sapply(counts.list, select, est_counts) %>% 
  bind_cols(counts.list[[1]][,"target_id"],.)
counts[is.na(counts)] <- 0
colnames(counts) <- sub(".est_counts","",colnames(counts),fixed = TRUE)
counts
write_csv(counts,"../output/2018-timecourse_V3.0_raw_counts_.csv.gz")

create sample description data frame

key <- readxl::read_excel("../input/tube_no_legend_time_course_2018.xlsx",
                          na=c("","na"),
                          col_types=c("text", "text", "text", "skip", "text", "skip", "skip", "skip", "text", "text", "text", "skip", "text", "skip", "skip", "date", "date"))  %>%
  mutate(sampling_time_specific=format(sampling_time_specific, format="%H:%M:%S"))
key

create reformatted tube_no

key <- key %>%
  mutate(tube_no_2 = {
    tolower(tube_no) %>%
      str_replace("q_([1-9](_|$))", "q_00\\1") %>%
      str_replace("q_([1-9][0-9](_|$))", "q_0\\1") }) %>%
  select(tube_no, tube_no_2, everything())
key
samples <- tibble(
  file=files,
  tube_no_2 = str_extract(files, pattern = "q_[0-9]{3}(_d8)?") 
)
samples
samples <- left_join(samples, key) 
Joining, by = "tube_no_2"
samples <- samples %>%
  mutate(sampling_day=str_pad(sampling_day,width=2,side = "left",pad="0")) %>%
  mutate(group=str_c(genotype, soil_trt, sampling_day, sampling_time,sep="-"))
samples
counts2 <- counts %>% 
  as.data.frame() %>% 
  column_to_rownames(var = "target_id") %>%
  as.matrix() %>%
  round(0)
samples2 <- samples %>%
  select(-tube_no_2, -tube_no, -pot, -plant_no, -sampling_day_specific, -sampling_time_specific) %>%
  as.data.frame() %>%
  column_to_rownames(var="file")
dge <- DGEList(counts=counts2, samples=samples2, group=samples2$group)

normalize

dge <- calcNormFactors(dge)
save(dge, file="../output/timecourseDGE.Rdata")

save some formatted cpm files

load("../output/timecourseDGE.Rdata")

cpm averaged for each sample type

log2cpmGroup <- cpmByGroup(dge, log = TRUE)
dim(log2cpmGroup)
[1] 46250    48
head(log2cpm[,1:10])
                 FPsc-ATM_BLANK-02-2_afternoon FPsc-ATM_BLANK-03-1_morn
BraA01g000010.3C                     0.1228981                0.1855259
BraA01g000020.3C                     4.5040649                4.1498411
BraA01g000030.3C                    -1.3215286               -0.6842086
BraA01g000040.3C                     6.0727831                5.9570869
BraA01g000050.3C                     2.0488744                1.5780617
BraA01g000060.3C                    -1.3241428               -1.4374041
                 FPsc-ATM_BLANK-03-2_afternoon FPsc-ATM_BLANK-03-3_evening_5.30
BraA01g000010.3C                    -0.2656371                       -0.3678073
BraA01g000020.3C                     4.2173450                        4.0387983
BraA01g000030.3C                    -0.4147637                       -0.3646628
BraA01g000040.3C                     6.0349576                        5.9312680
BraA01g000050.3C                     1.5568859                        1.9381266
BraA01g000060.3C                    -1.4374041                       -1.4374041
                 FPsc-ATM_BLANK-03-4_night_1 FPsc-ATM_BLANK-03-5_night_2 FPsc-ATM_BLANK-04-1_morn
BraA01g000010.3C                  -0.4365975                  -0.6444735               -0.8937118
BraA01g000020.3C                   4.0717934                   4.0001420                4.2911430
BraA01g000030.3C                  -1.1149994                  -0.3966245               -0.5600769
BraA01g000040.3C                   5.7669761                   5.8282151                5.9890498
BraA01g000050.3C                   1.7998456                   1.8459871                1.6437956
BraA01g000060.3C                  -1.4374041                  -1.4374041               -1.4374041
                 FPsc-ATM_BLANK-04-2_afternoon FPsc-ATM_BLANK-04-3_evening_5.30
BraA01g000010.3C                     0.3708927                       -0.5266988
BraA01g000020.3C                     3.9806886                        4.2305824
BraA01g000030.3C                    -0.8354973                       -0.8281699
BraA01g000040.3C                     5.9081725                        5.7867749
BraA01g000050.3C                     1.5984162                        1.8688470
BraA01g000060.3C                    -1.4374041                       -1.4374041
                 FPsc-ATM_BLANK-04-4_night_1
BraA01g000010.3C                 -0.03054253
BraA01g000020.3C                  3.91593441
BraA01g000030.3C                 -0.75851324
BraA01g000040.3C                  5.91802906
BraA01g000050.3C                  1.90403532
BraA01g000060.3C                 -1.43740410
write.csv(log2cpmGroup, "../output/log2cpmGroup.csv.gz")

cpm for each individual sample

samplenames <- str_c(dge$samples$group,"_blk",dge$samples$block)
log2cpmSample <- cpmByGroup(dge, log = TRUE, group=samplenames)
dim(log2cpmSample)
[1] 46250   288
head(log2cpmSample[,1:10])
                 FPsc-ATM_BLANK-02-2_afternoon_blk1 FPsc-ATM_BLANK-02-2_afternoon_blk2
BraA01g000010.3C                          0.3400216                           0.384717
BraA01g000020.3C                          4.5764433                           4.411531
BraA01g000030.3C                         -1.4374041                          -1.437404
BraA01g000040.3C                          6.0943903                           6.243121
BraA01g000050.3C                          1.9168970                           1.595641
BraA01g000060.3C                         -1.4374041                          -1.437404
                 FPsc-ATM_BLANK-02-2_afternoon_blk3 FPsc-ATM_BLANK-02-2_afternoon_blk4
BraA01g000010.3C                         -1.4374041                         0.09091967
BraA01g000020.3C                          4.7686054                         4.37711591
BraA01g000030.3C                         -1.4374041                        -0.73415423
BraA01g000040.3C                          5.9496220                         6.04456997
BraA01g000050.3C                          2.6011030                         1.75891783
BraA01g000060.3C                         -0.8695907                        -1.43740410
                 FPsc-ATM_BLANK-02-2_afternoon_blk5 FPsc-ATM_BLANK-02-2_afternoon_blk6
BraA01g000010.3C                        -0.03366926                          0.5637431
BraA01g000020.3C                         4.64860469                          4.1661298
BraA01g000030.3C                        -1.43740410                         -1.4374041
BraA01g000040.3C                         6.03998090                          6.0467807
BraA01g000050.3C                         2.49466558                          1.6145669
BraA01g000060.3C                        -1.43740410                         -1.4374041
                 FPsc-ATM_BLANK-03-1_morn_blk1 FPsc-ATM_BLANK-03-1_morn_blk2
BraA01g000010.3C                     0.5251546                    -0.5241447
BraA01g000020.3C                     3.9196502                     3.8473212
BraA01g000030.3C                    -1.4374041                    -0.2202086
BraA01g000040.3C                     5.8183833                     6.0247640
BraA01g000050.3C                     1.8394763                     1.9236065
BraA01g000060.3C                    -1.4374041                    -1.4374041
                 FPsc-ATM_BLANK-03-1_morn_blk3 FPsc-ATM_BLANK-03-1_morn_blk4
BraA01g000010.3C                     0.3096726                      1.115971
BraA01g000020.3C                     4.3493457                      3.695601
BraA01g000030.3C                    -0.8802583                     -1.437404
BraA01g000040.3C                     5.8443365                      5.830659
BraA01g000050.3C                     0.9527679                      1.834303
BraA01g000060.3C                    -1.4374041                     -1.437404
write.csv(log2cpmSample, "../output/log2cpmSample.csv.gz")
LS0tCnRpdGxlOiAiMDJfTm9ybWFsaXplX1JlYWRzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpOb3JtYWxpemUgcmVhZHMgYW5kIGNyZWF0ZSBER0Ugb2JqZWN0cwoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGVkZ2VSKQpgYGAKCiMjIGdldCByZWFkcyBhbmQgY3JlYXRlIGRhdGEgZnJhbWUKCmBgYHtyfQpmaWxlcyA8LSBkaXIoIi4uL2lucHV0L0thbGxpc3RvX291dHB1dC8iLCBpbmNsdWRlLmRpcnMgPSBUUlVFKQpmaWxlcyAlPiUgaGVhZCgpCmBgYAoKYGBge3J9CmNvdW50cy5saXN0IDwtIG1hcChmaWxlcywgfiByZWFkX3RzdigKICBmaWxlPWZpbGUucGF0aCgiLi4iLCJpbnB1dCIsIkthbGxpc3RvX291dHB1dCIsLiwiYWJ1bmRhbmNlLnRzdiIpLAogIGNvbF90eXBlcyA9ICJjZGRkZCIpKQpuYW1lcyhjb3VudHMubGlzdCkgPC0gZmlsZXMKYGBgCgpgYGB7cn0KY291bnRzIDwtIHNhcHBseShjb3VudHMubGlzdCwgc2VsZWN0LCBlc3RfY291bnRzKSAlPiUgCiAgYmluZF9jb2xzKGNvdW50cy5saXN0W1sxXV1bLCJ0YXJnZXRfaWQiXSwuKQpjb3VudHNbaXMubmEoY291bnRzKV0gPC0gMApjb2xuYW1lcyhjb3VudHMpIDwtIHN1YigiLmVzdF9jb3VudHMiLCIiLGNvbG5hbWVzKGNvdW50cyksZml4ZWQgPSBUUlVFKQpjb3VudHMKYGBgCgpgYGB7cn0Kd3JpdGVfY3N2KGNvdW50cywiLi4vb3V0cHV0LzIwMTgtdGltZWNvdXJzZV9WMy4wX3Jhd19jb3VudHNfLmNzdi5neiIpCmBgYAoKIyMgY3JlYXRlIHNhbXBsZSBkZXNjcmlwdGlvbiBkYXRhIGZyYW1lCgpgYGB7cn0Ka2V5IDwtIHJlYWR4bDo6cmVhZF9leGNlbCgiLi4vaW5wdXQvdHViZV9ub19sZWdlbmRfdGltZV9jb3Vyc2VfMjAxOC54bHN4IiwKICAgICAgICAgICAgICAgICAgICAgICAgICBuYT1jKCIiLCJuYSIpLAogICAgICAgICAgICAgICAgICAgICAgICAgIGNvbF90eXBlcz1jKCJ0ZXh0IiwgInRleHQiLCAidGV4dCIsICJza2lwIiwgInRleHQiLCAic2tpcCIsICJza2lwIiwgInNraXAiLCAidGV4dCIsICJ0ZXh0IiwgInRleHQiLCAic2tpcCIsICJ0ZXh0IiwgInNraXAiLCAic2tpcCIsICJkYXRlIiwgImRhdGUiKSkgICU+JQogIG11dGF0ZShzYW1wbGluZ190aW1lX3NwZWNpZmljPWZvcm1hdChzYW1wbGluZ190aW1lX3NwZWNpZmljLCBmb3JtYXQ9IiVIOiVNOiVTIikpCmtleQpgYGAKCmNyZWF0ZSByZWZvcm1hdHRlZCB0dWJlX25vIApgYGB7cn0Ka2V5IDwtIGtleSAlPiUKICBtdXRhdGUodHViZV9ub18yID0gewogICAgdG9sb3dlcih0dWJlX25vKSAlPiUKICAgICAgc3RyX3JlcGxhY2UoInFfKFsxLTldKF98JCkpIiwgInFfMDBcXDEiKSAlPiUKICAgICAgc3RyX3JlcGxhY2UoInFfKFsxLTldWzAtOV0oX3wkKSkiLCAicV8wXFwxIikgfSkgJT4lCiAgc2VsZWN0KHR1YmVfbm8sIHR1YmVfbm9fMiwgZXZlcnl0aGluZygpKQprZXkKYGBgCgpgYGB7cn0Kc2FtcGxlcyA8LSB0aWJibGUoCiAgZmlsZT1maWxlcywKICB0dWJlX25vXzIgPSBzdHJfZXh0cmFjdChmaWxlcywgcGF0dGVybiA9ICJxX1swLTldezN9KF9kOCk/IikgCikKc2FtcGxlcwpgYGAKCmBgYHtyfQpzYW1wbGVzIDwtIGxlZnRfam9pbihzYW1wbGVzLCBrZXkpIApzYW1wbGVzIDwtIHNhbXBsZXMgJT4lCiAgbXV0YXRlKHNhbXBsaW5nX2RheT1zdHJfcGFkKHNhbXBsaW5nX2RheSx3aWR0aD0yLHNpZGUgPSAibGVmdCIscGFkPSIwIikpICU+JQogIG11dGF0ZShncm91cD1zdHJfYyhnZW5vdHlwZSwgc29pbF90cnQsIHNhbXBsaW5nX2RheSwgc2FtcGxpbmdfdGltZSxzZXA9Ii0iKSkKc2FtcGxlcwpgYGAKCmBgYHtyfQpjb3VudHMyIDwtIGNvdW50cyAlPiUgCiAgYXMuZGF0YS5mcmFtZSgpICU+JSAKICBjb2x1bW5fdG9fcm93bmFtZXModmFyID0gInRhcmdldF9pZCIpICU+JQogIGFzLm1hdHJpeCgpICU+JQogIHJvdW5kKDApCmBgYAoKYGBge3J9CnNhbXBsZXMyIDwtIHNhbXBsZXMgJT4lCiAgc2VsZWN0KC10dWJlX25vXzIsIC10dWJlX25vLCAtcG90LCAtcGxhbnRfbm8sIC1zYW1wbGluZ19kYXlfc3BlY2lmaWMsIC1zYW1wbGluZ190aW1lX3NwZWNpZmljKSAlPiUKICBhcy5kYXRhLmZyYW1lKCkgJT4lCiAgY29sdW1uX3RvX3Jvd25hbWVzKHZhcj0iZmlsZSIpCmBgYAoKYGBge3J9CmRnZSA8LSBER0VMaXN0KGNvdW50cz1jb3VudHMyLCBzYW1wbGVzPXNhbXBsZXMyLCBncm91cD1zYW1wbGVzMiRncm91cCkKYGBgCgojIyBub3JtYWxpemUKCmBgYHtyfQpkZ2UgPC0gY2FsY05vcm1GYWN0b3JzKGRnZSkKYGBgCgpgYGB7cn0Kc2F2ZShkZ2UsIGZpbGU9Ii4uL291dHB1dC90aW1lY291cnNlREdFLlJkYXRhIikKYGBgCgojIyBzYXZlIHNvbWUgZm9ybWF0dGVkIGNwbSBmaWxlcwoKYGBge3J9CmxvYWQoIi4uL291dHB1dC90aW1lY291cnNlREdFLlJkYXRhIikKYGBgCgpgYGB7cn0KZGdlJHNhbXBsZQpgYGAKCmNwbSBhdmVyYWdlZCBmb3IgZWFjaCBzYW1wbGUgdHlwZQpgYGB7cn0KbG9nMmNwbUdyb3VwIDwtIGNwbUJ5R3JvdXAoZGdlLCBsb2cgPSBUUlVFKQpkaW0obG9nMmNwbUdyb3VwKQpoZWFkKGxvZzJjcG1bLDE6MTBdKQpgYGAKCmBgYHtyfQp3cml0ZS5jc3YobG9nMmNwbUdyb3VwLCAiLi4vb3V0cHV0L2xvZzJjcG1Hcm91cC5jc3YuZ3oiKQpgYGAKCmNwbSBmb3IgZWFjaCBpbmRpdmlkdWFsIHNhbXBsZQpgYGB7cn0Kc2FtcGxlbmFtZXMgPC0gc3RyX2MoZGdlJHNhbXBsZXMkZ3JvdXAsIl9ibGsiLGRnZSRzYW1wbGVzJGJsb2NrKQpsb2cyY3BtU2FtcGxlIDwtIGNwbUJ5R3JvdXAoZGdlLCBsb2cgPSBUUlVFLCBncm91cD1zYW1wbGVuYW1lcykKZGltKGxvZzJjcG1TYW1wbGUpCmhlYWQobG9nMmNwbVNhbXBsZVssMToxMF0pCmBgYAoKYGBge3J9CndyaXRlLmNzdihsb2cyY3BtU2FtcGxlLCAiLi4vb3V0cHV0L2xvZzJjcG1TYW1wbGUuY3N2Lmd6IikKYGBg